mental_df =
read_csv("./data/mental_data.csv") %>%
janitor::clean_names() %>%
mutate(
any_mental_num = any_mental_num / 1000000,
any_mental_per = any_mental_per * 100,
ser_mental_num = ser_mental_num / 1000000,
ser_mental_per = ser_mental_per * 100,
state_abb = state.abb[match(state, state.name)],
region = state.region[match(state, state.name)]
) %>%
mutate(
state_abb = replace(state_abb, state == "District of Columbia", "DC"))
## Rows: 51 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (9): any_mental_per, any_mental_num, ser_mental_per, ser_mental_num, tee...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data cleaning
nhis =
read_csv("data/nhis_data01.csv") %>%
janitor::clean_names() %>%
mutate(
worrx = recode_factor(worrx,'1' = "no", '2' = "yes"),
worfreq = recode_factor(worfreq, '1' = "Daily", '2' = "Weekly",
'3' = "Monthly", '4' = "A few times a year",
'5' = "Never"),
worfeelevl = recode_factor(worfeelevl, '1' = "A lot",
'3' = "Somewhere between a little and a lot",
'2' = "A little"),
deprx = recode_factor(deprx, '1' = "no", '2' = "yes"),
depfreq = recode_factor(depfreq, '1' = "Daily", '2' = "Weekly",
'3' = "Monthly", '4' = "A few times a year",
'5' = "Never"),
depfeelevl = recode_factor(depfeelevl, '1' = "A lot",
'3' = "Somewhere between a little and a lot",
'2' = "A little")
)
## Rows: 468212 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): NHISHID, NHISPID, HHX, FMX, PX
## dbl (28): YEAR, SERIAL, STRATA, PSU, HHWEIGHT, PERNUM, PERWEIGHT, SAMPWEIGHT...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
recent_yr_df =
nhis %>%
filter(year>2019)
number of people reported taken medication for worried, nervous, or anxious feeings from 2015 to 2021.
nhis %>%
filter(year>=2015) %>%
drop_na(worrx) %>%
group_by(year, worrx) %>%
summarize(wor_num = n()) %>%
pivot_wider(
names_from = worrx,
values_from = wor_num
) %>%
mutate(
wor_percentage = yes/(no + yes)*100,
text_label = str_c(yes, " out of ", no + yes)
) %>%
plot_ly(
y = ~wor_percentage,
x = ~year,
color = ~year,
type = "bar",
colors = "viridis",
text = ~text_label) %>%
layout(title = "Percentage of people reported taken medication for worried, nervous, or anxious feeings each year",
xaxis = list (title = ""),
yaxis = list (title = "Percentage")
) %>%
hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
Frequency of worries
nhis %>%
filter(year>=2015) %>%
drop_na(worfreq) %>%
group_by(year, worfreq) %>%
summarize(count = n()) %>%
group_by(year) %>%
summarize(
percentage=100 * count/sum(count),
sum_count = sum(count),
worfreq = worfreq,
count=count
) %>%
mutate(
text_label = str_c(count, " out of ", sum_count)
) %>%
plot_ly(
y = ~percentage,
x = ~year,
color = ~worfreq,
type = "bar",
colors = "viridis",
text = ~text_label
) %>%
layout(
xaxis = list (title = ""),
yaxis = list (title = "Percentage"),
barmode = 'stack',
legend = list(orientation = 'h')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
worry level distribution.
nhis %>%
filter(year>=2015) %>%
drop_na(worfeelevl) %>%
group_by(year, worfeelevl) %>%
summarize(count = n()) %>%
group_by(year) %>%
summarize(
percentage=100 * count/sum(count),
sum_count = sum(count),
worfeelevl = worfeelevl,
count=count
) %>%
mutate(
text_label = str_c(count, " out of ", sum_count)
) %>%
plot_ly(
y = ~percentage,
x = ~year,
color = ~worfeelevl,
type = "bar",
colors = "viridis",
text = ~text_label
) %>%
layout(
title = "Distribution of worry levels",
xaxis = list (title = ""),
yaxis = list (title = "Percentage"),
barmode = 'stack',
legend = list(orientation = 'h')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
logistic regression
whether taken medication for worried, nervous, or anxious feelings is associated with covid-19 adjusting for sex and family income level.
glm(worrx ~ sex + poverty + cvddiag,
family=binomial(link='logit'),
data = recent_yr_df) %>%
broom::tidy(conf.int = TRUE) %>%
mutate(
odds_ratio = exp(estimate),
low = exp(conf.low),
high = exp(conf.high)
) %>%
select(term, odds_ratio, low, high) %>%
knitr::kable(digits=2)
| term | odds_ratio | low | high |
|---|---|---|---|
| (Intercept) | 0.08 | 0.07 | 0.09 |
| sex | 2.09 | 1.98 | 2.20 |
| poverty | 0.98 | 0.98 | 0.99 |
| cvddiag | 1.07 | 1.03 | 1.12 |
number of people reported taken medication for depression from 2015 to 2021.
nhis %>%
filter(year>=2015) %>%
drop_na(deprx) %>%
group_by(year, deprx) %>%
summarize(dep_num = n()) %>%
pivot_wider(
names_from = deprx,
values_from = dep_num
) %>%
mutate(
dep_percentage = yes/(no + yes)*100,
text_label = str_c(yes, " out of ", no + yes)
) %>%
plot_ly(
y = ~dep_percentage,
x = ~year,
color = ~year,
type = "bar",
colors = "viridis",
text = ~text_label) %>%
layout(title = "Percentage of people reported taken medication for depression each year",
xaxis = list (title = ""),
yaxis = list (title = "Percentage")
) %>%
hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
Frequency of feel depressed
nhis %>%
filter(year>=2015) %>%
drop_na(depfreq) %>%
group_by(year, depfreq) %>%
summarize(count = n()) %>%
group_by(year) %>%
summarize(
percentage=100 * count/sum(count),
sum_count = sum(count),
depfreq = depfreq,
count=count
) %>%
mutate(
text_label = str_c(count, " out of ", sum_count)
) %>%
plot_ly(
y = ~percentage,
x = ~year,
color = ~depfreq,
type = "bar",
colors = "viridis",
text = ~text_label
) %>%
layout(
title = "Distribution of frequency of feel depressed",
xaxis = list (title = ""),
yaxis = list (title = "Percentage"),
barmode = 'stack',
legend = list(orientation = 'h')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
depression level distribution.
nhis %>%
filter(year>=2015) %>%
drop_na(depfeelevl) %>%
group_by(year, depfeelevl) %>%
summarize(count = n()) %>%
group_by(year) %>%
mutate(
percentage = 100 * count/sum(count),
sum_count = sum(count),
text_label = str_c(count, " out of ", sum_count)
) %>%
plot_ly(
y = ~percentage,
x = ~year,
color = ~depfeelevl,
type = "bar",
colors = "viridis",
text = ~text_label
) %>%
layout(
title = "Distribution of depression levels",
xaxis = list (title = ""),
yaxis = list (title = "Percentage"),
barmode = 'stack',
legend = list(orientation = 'h')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
logistic regression
whether taken medication for depression is associated with covid-19 adjusting for sex and family income level.
glm(deprx ~ sex + poverty + cvddiag,
family=binomial(link='logit'),
data = recent_yr_df) %>%
broom::tidy(conf.int = TRUE) %>%
mutate(
odds_ratio = exp(estimate),
low = exp(conf.low),
high = exp(conf.high)
) %>%
select(term, odds_ratio, low, high) %>%
knitr::kable(digits=2)
| term | odds_ratio | low | high |
|---|---|---|---|
| (Intercept) | 0.08 | 0.07 | 0.09 |
| sex | 2.08 | 1.97 | 2.20 |
| poverty | 0.97 | 0.97 | 0.98 |
| cvddiag | 1.05 | 1.00 | 1.10 |